Authors : Tracy Rabilloud*, Delphine Potier*, Saran Pankew, Mathis Nozais, Marie Loosveld§, Dominique Payet-Bornet§
* Equal contribution
§ Corresponding authors
PMID: to come
Raw data and intermediate data matrices are available in SRA/GEO (SRP269742 / GSE153697)
Docker images and intermediate seurat object are available in zenodo. Any questions on this analysis, please contact Delphine Potier
In the CaSpER paper figure 4C (see https://www.nature.com/articles/s41467-019-13779-x), a Ɣ threshold of 9 correspond to a thr of 1 in the extractLargeScaleEvents() function. Indeed, in the function, mergeScalesDel variable is divided by the object length which is 9 (9/9 = 1) (“finalChrMat[(mergeScalesDel/length(final.objects)) >= thr] <- (-1)”)
# Plot 9q deletion score
## Clones of interest : Get barcodes for T1-CD19neg clones located in the T1 tumoral area (Cluster 0) for highlighting
Idents(Seurat) <- "RNA_snn_res.0.1"
HTO_T1CD19n_in_tumor <- intersect(row.names(subset(Seurat@meta.data, hash.ID == "T1-CD19neg")), WhichCells(Seurat, slot = "RNA_snn_res.0.1", idents = c("0","1")))
## Add 9q status in the metadata
Seurat@meta.data$chr9q <- "nothing"
Seurat@meta.data$chr9q <- finalChrMat[,"9q"]
## Draw UMAP
### Get the coordinates & values
Umap<-data.frame(
UMAP_1 = Seurat@reductions$umap@cell.embeddings[,1],
UMAP_2= Seurat@reductions$umap@cell.embeddings[,2],
cnv= Seurat@meta.data$chr9q)
HTO= Seurat@meta.data$hash.ID
Max=max(Seurat@meta.data$chr9q)
Min=min(Seurat@meta.data$chr9q)
ggplot(Umap,aes(x=UMAP_1,y=UMAP_2))+geom_point(aes(color=as.factor(cnv),shape=HTO),size = 0.8)+ scale_color_manual(name = "Chr9q", labels = c("Deletion","Neutral","Amplification"),values=c("blue", "grey", "orange"))+geom_point(data = Umap[HTO_T1CD19n_in_tumor,],aes(x=UMAP_1,y=UMAP_2, fill = as.factor(cnv)),shape = 21,size = 3, color = "red")+ scale_fill_manual(values=c("blue", "grey", "orange"))
Interactive UMAP
### Plot an interactive UMAP
ggplotly(ggplot(Umap,aes(x=UMAP_1,y=UMAP_2))+geom_point(aes(color=as.factor(cnv),shape=HTO),size = 0.6)+ scale_color_manual(name = "Chr9q", labels = c("Deletion","Neutral","Amplification"),values=c("blue", "grey", "orange"))+geom_point(data = Umap[HTO_T1CD19n_in_tumor,],aes(x=UMAP_1,y=UMAP_2, fill = as.factor(cnv)),shape = 21,size = 2, color = "red")+ scale_fill_manual(values=c("blue", "grey", "orange")))
C0_chr9q_del_counts <- table(factor(Seurat@meta.data[which(Seurat@meta.data$RNA_snn_res.0.1 == "0"),]$chr9q, levels = -1:1))
C1_chr9q_del_counts <- table(Seurat@meta.data[which(Seurat@meta.data$RNA_snn_res.0.1 == "1"),]$chr9q)
C2345_chr9q_del_counts <- table(Seurat@meta.data[which(Seurat@meta.data$RNA_snn_res.0.1 == "2" | Seurat@meta.data$RNA_snn_res.0.1 == "3" | Seurat@meta.data$RNA_snn_res.0.1 == "4" | Seurat@meta.data$RNA_snn_res.0.1 =="5"),]$chr9q)
chr9q_table <- matrix(data = c(C0_chr9q_del_counts,C1_chr9q_del_counts,C2345_chr9q_del_counts), nrow = 3,ncol = 3 )
rownames(chr9q_table) <- c("Deletion","Neutral","Amplification")
colnames(chr9q_table) <- c("Cluster 0","Cluster 1","Clusters 2, 3, 4, 5")
chr9q_table
## Cluster 0 Cluster 1 Clusters 2, 3, 4, 5
## Deletion 872 640 1
## Neutral 262 152 988
## Amplification 0 2 2
Seurat20c <- subset(Seurat, cells = HTO_T1CD19n_in_tumor)
HTO_T1CD19n_in_tumor_chr9q_del_counts <- table(factor(Seurat20c@meta.data[,]$chr9q, levels = -1:1))
HTO_T1CD19n_in_tumor_chr9q_table <- matrix(data = c(HTO_T1CD19n_in_tumor_chr9q_del_counts), nrow = 3,ncol = 1 )
rownames(HTO_T1CD19n_in_tumor_chr9q_table) <- c("Deletion","Neutral","Amplification")
colnames(HTO_T1CD19n_in_tumor_chr9q_table) <- c("T1CD19neg cells in tumoral clusters (0 & 1)")
HTO_T1CD19n_in_tumor_chr9q_table
## T1CD19neg cells in tumoral clusters (0 & 1)
## Deletion 10
## Neutral 10
## Amplification 0
round(prop.table(chr9q_table,2)*100,2)
## Cluster 0 Cluster 1 Clusters 2, 3, 4, 5
## Deletion 76.9 80.60 0.1
## Neutral 23.1 19.14 99.7
## Amplification 0.0 0.25 0.2
## summarize large scale events
finalChrMat2 <- extractLargeScaleEvents (final.objects, thr=0.65) # less stringeant, increase the sensitivity but deacrease specificity, by default thr is est at 0.5
# Plot 9q deletion score
Seurat@meta.data$chr9q <- "nothing"
Seurat@meta.data$chr9q <- finalChrMat2[,"9q"]
Umap<-data.frame(
UMAP_1 = Seurat@reductions$umap@cell.embeddings[,1],
UMAP_2= Seurat@reductions$umap@cell.embeddings[,2],
cnv= Seurat@meta.data$chr9q
)
HTO= Seurat@meta.data$hash.ID
Max=max(Seurat@meta.data$chr9q)
Min=min(Seurat@meta.data$chr9q)
#Non interactive UMAP
ggplot(Umap,aes(x=UMAP_1,y=UMAP_2))+geom_point(aes(color=as.factor(cnv),shape=HTO),size = 0.8)+ scale_color_manual(name = "Chr9q", labels = c("Deletion","Neutral","Amplification"),values=c("blue", "grey", "orange"))+geom_point(data = Umap[HTO_T1CD19n_in_tumor,],aes(x=UMAP_1,y=UMAP_2, fill = as.factor(cnv)),shape = 21,size = 3, color = "red")+ scale_fill_manual(values=c("blue", "grey", "orange"))
Interactive UMAP
#Interactive UMAP
ggplotly(ggplot(Umap,aes(x=UMAP_1,y=UMAP_2))+geom_point(aes(color=as.factor(cnv),shape=HTO),size = 0.6)+ scale_color_manual(name = "Chr9q", labels = c("Deletion","Neutral","Amplification"),values=c("blue", "grey", "orange"))+geom_point(data = Umap[HTO_T1CD19n_in_tumor,],aes(x=UMAP_1,y=UMAP_2, fill = as.factor(cnv)),shape = 21,size = 2, color = "red")+ scale_fill_manual(values=c("blue", "grey", "orange")))
C0_chr9q_del_counts <- table(factor(Seurat@meta.data[which(Seurat@meta.data$RNA_snn_res.0.1 == "0"),]$chr9q, levels = -1:1))
C1_chr9q_del_counts <- table(Seurat@meta.data[which(Seurat@meta.data$RNA_snn_res.0.1 == "1"),]$chr9q)
C2345_chr9q_del_counts <- table(Seurat@meta.data[which(Seurat@meta.data$RNA_snn_res.0.1 == "2" | Seurat@meta.data$RNA_snn_res.0.1 == "3" | Seurat@meta.data$RNA_snn_res.0.1 == "4" | Seurat@meta.data$RNA_snn_res.0.1 =="5"),]$chr9q)
chr9q_table <- matrix(data = c(C0_chr9q_del_counts,C1_chr9q_del_counts,C2345_chr9q_del_counts), nrow = 3,ncol = 3 )
rownames(chr9q_table) <- c("Deletion","Neutral","Amplification")
colnames(chr9q_table) <- c("Cluster 0","Cluster 1","Clusters 2, 3, 4, 5")
chr9q_table
## Cluster 0 Cluster 1 Clusters 2, 3, 4, 5
## Deletion 1020 726 1
## Neutral 113 65 979
## Amplification 1 3 11
Seurat20c <- subset(Seurat, cells = HTO_T1CD19n_in_tumor)
HTO_T1CD19n_in_tumor_chr9q_del_counts <- table(factor(Seurat20c@meta.data[,]$chr9q, levels = -1:1))
HTO_T1CD19n_in_tumor_chr9q_table <- matrix(data = c(HTO_T1CD19n_in_tumor_chr9q_del_counts), nrow = 3,ncol = 1 )
rownames(HTO_T1CD19n_in_tumor_chr9q_table) <- c("Deletion","Neutral","Amplification")
colnames(HTO_T1CD19n_in_tumor_chr9q_table) <- c("T1CD19neg cells in tumoral clusters (0 & 1)")
HTO_T1CD19n_in_tumor_chr9q_table
## T1CD19neg cells in tumoral clusters (0 & 1)
## Deletion 16
## Neutral 4
## Amplification 0
round(prop.table(chr9q_table,2)*100,2)
## Cluster 0 Cluster 1 Clusters 2, 3, 4, 5
## Deletion 89.95 91.44 0.10
## Neutral 9.96 8.19 98.79
## Amplification 0.09 0.38 1.11
# Load bafextract results (here "cluster0_T1CD19neg_baf_mincov10_2_05", "cluster0_T1CD19neg_baf_mincov10", "cluster0_T1CD19neg_baf_mincov6" & "cluster0_T1CD19neg_baf")
loh_T1CD19n_in_tumor <- readBAFExtractOutput(path = OTHER_BAF_PATH, sequencing.type = "single-cell")
names(loh_T1CD19n_in_tumor) <- gsub(".snp","", names(loh_T1CD19n_in_tumor))
obj <- final.objects[[9]]
#Check for some specific BAF
## rs10900809
snp132490630_c0 <- obj@loh$cluster0_baf[which(obj@loh$cluster0_baf$chr == "5" & obj@loh$cluster0_baf$position == "132490630"),]
snp132490630_c1 <- obj@loh$cluster1_baf[which(obj@loh$cluster1_baf$chr == "5" & obj@loh$cluster1_baf$position == "132490630"),]
snp132490630_c2_3_4_5 <- obj@loh$clusters_2_3_4_5_baf[which(obj@loh$clusters_2_3_4_5_baf$chr == "5" & obj@loh$clusters_2_3_4_5_baf$position == "132490630"),]
snp_freq_table <- matrix(data = c(snp132490630_c0$alt,snp132490630_c1$alt,snp132490630_c2_3_4_5$alt,snp132490630_c0$ref,snp132490630_c1$ref,snp132490630_c2_3_4_5$ref,snp132490630_c0$coverage,snp132490630_c1$coverage,snp132490630_c2_3_4_5$coverage,snp132490630_c0$baf,snp132490630_c1$baf,snp132490630_c2_3_4_5$baf), nrow = 3,ncol = 4 )
rownames(snp_freq_table) <- c("Cluster 0","Cluster 1","Clusters 2,3,4,5")
colnames(snp_freq_table) <- c("Alt","Ref","Coverage","BAF")
snp_freq_table
## Alt Ref Coverage BAF
## Cluster 0 456 7 463 0.9848812
## Cluster 1 641 12 653 0.9816233
## Clusters 2,3,4,5 447 365 812 0.5504926
snp132490630_tumT1CD19neg <- loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6[which(loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$chr == "5" & loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$position == "132490630"),]
rownames(snp132490630_tumT1CD19neg) <- "Tumoral T1CD19negative cells"
snp132490630_tumT1CD19neg[,3:6]
## alt ref coverage baf
## Tumoral T1CD19negative cells 8 0 8 1
snp150000930_c0 <- obj@loh$cluster0_baf[which(obj@loh$cluster0_baf$chr == "5" & obj@loh$cluster0_baf$position == "150000930"),]
snp150000930_c1 <- obj@loh$cluster1_baf[which(obj@loh$cluster1_baf$chr == "5" & obj@loh$cluster1_baf$position == "150000930"),]
snp150000930_c2_3_4_5 <- obj@loh$clusters_2_3_4_5_baf[which(obj@loh$clusters_2_3_4_5_baf$chr == "5" & obj@loh$clusters_2_3_4_5_baf$position == "150000930"),]
snp_freq_table <- matrix(data = c(snp150000930_c0$alt,snp150000930_c1$alt,snp150000930_c2_3_4_5$alt,snp150000930_c0$ref,snp150000930_c1$ref,snp150000930_c2_3_4_5$ref,snp150000930_c0$coverage,snp150000930_c1$coverage,snp150000930_c2_3_4_5$coverage,snp150000930_c0$baf,snp150000930_c1$baf,snp150000930_c2_3_4_5$baf), nrow = 3,ncol = 4 )
rownames(snp_freq_table) <- c("Cluster 0","Cluster 1","Clusters 2,3,4,5")
colnames(snp_freq_table) <- c("Alt","Ref","Coverage","BAF")
snp_freq_table
## Alt Ref Coverage BAF
## Cluster 0 333 1 334 0.9970060
## Cluster 1 259 4 263 0.9847909
## Clusters 2,3,4,5 62 51 113 0.5486726
### loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6[which(loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$chr == "5" & loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$position == "150402064"),]
print("Less than 6 reads at chr5:150000930 for tumoral T1CD19neg cells")
## [1] "Less than 6 reads at chr5:150000930 for tumoral T1CD19neg cells"
snp150402064_c0 <- obj@loh$cluster0_baf[which(obj@loh$cluster0_baf$chr == "5" & obj@loh$cluster0_baf$position == "150402064"),]
snp150402064_c1 <- obj@loh$cluster1_baf[which(obj@loh$cluster1_baf$chr == "5" & obj@loh$cluster1_baf$position == "150402064"),]
snp150402064_c2_3_4_5 <- obj@loh$clusters_2_3_4_5_baf[which(obj@loh$clusters_2_3_4_5_baf$chr == "5" & obj@loh$clusters_2_3_4_5_baf$position == "150402064"),]
snp_freq_table <- matrix(data = c(snp150402064_c0$alt,snp150402064_c1$alt,snp150402064_c2_3_4_5$alt,snp150402064_c0$ref,snp150402064_c1$ref,snp150402064_c2_3_4_5$ref,snp150402064_c0$coverage,snp150402064_c1$coverage,snp150402064_c2_3_4_5$coverage,snp150402064_c0$baf,snp150402064_c1$baf,snp150402064_c2_3_4_5$baf), nrow = 3,ncol = 4 )
rownames(snp_freq_table) <- c("Cluster 0","Cluster 1","Clusters 2,3,4,5")
colnames(snp_freq_table) <- c("Alt","Ref","Coverage","BAF")
snp_freq_table
## Alt Ref Coverage BAF
## Cluster 0 1477 22 1499 0.9853235
## Cluster 1 1013 26 1039 0.9749759
## Clusters 2,3,4,5 706 709 1415 0.4989399
snp150402064_tumT1CD19neg <- loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6[which(loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$chr == "5" & loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$position == "150402064"),]
rownames(snp150402064_tumT1CD19neg) <- "Tumoral T1CD19negative cells"
snp150402064_tumT1CD19neg[,3:6]
## alt ref coverage baf
## Tumoral T1CD19negative cells 7 0 7 1
snp150446963_c0 <- obj@loh$cluster0_baf[which(obj@loh$cluster0_baf$chr == "5" & obj@loh$cluster0_baf$position == "150446963"),]
snp150446963_c1 <- obj@loh$cluster1_baf[which(obj@loh$cluster1_baf$chr == "5" & obj@loh$cluster1_baf$position == "150446963"),]
snp150446963_c2_3_4_5 <- obj@loh$clusters_2_3_4_5_baf[which(obj@loh$clusters_2_3_4_5_baf$chr == "5" & obj@loh$clusters_2_3_4_5_baf$position == "150446963"),]
snp_freq_table <- matrix(data = c(snp150446963_c0$alt,snp150446963_c1$alt,snp150446963_c2_3_4_5$alt,snp150446963_c0$ref,snp150446963_c1$ref,snp150446963_c2_3_4_5$ref,snp150446963_c0$coverage,snp150446963_c1$coverage,snp150446963_c2_3_4_5$coverage,snp150446963_c0$baf,snp150446963_c1$baf,snp150446963_c2_3_4_5$baf), nrow = 3,ncol = 4 )
rownames(snp_freq_table) <- c("Cluster 0","Cluster 1","Clusters 2,3,4,5")
colnames(snp_freq_table) <- c("Alt","Ref","Coverage","BAF")
snp_freq_table
## Alt Ref Coverage BAF
## Cluster 0 5142 306 5448 0.9438326
## Cluster 1 5850 418 6268 0.9333121
## Clusters 2,3,4,5 4765 16797 21562 0.2209906
snp150446963_tumT1CD19neg <- loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6[which(loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$chr == "5" & loh_T1CD19n_in_tumor$cluster0_T1CD19neg_baf_mincov6$position == "150446963"),]
rownames(snp150446963_tumT1CD19neg) <- "Tumoral T1CD19negative cells"
snp150446963_tumT1CD19neg[,3:6]
## alt ref coverage baf
## Tumoral T1CD19negative cells 80 7 87 0.9195402
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 10 (buster)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.3.5.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] CaSpER_0.2.0 GOstats_2.52.0 graph_1.64.0
## [4] Category_2.52.1 Matrix_1.2-18 org.Hs.eg.db_3.10.0
## [7] GO.db_3.10.0 AnnotationDbi_1.48.0 Biobase_2.46.0
## [10] limma_3.42.2 biomaRt_2.42.1 ape_5.3
## [13] ggnetwork_0.5.8 intergraph_2.0-2 igraph_1.2.5
## [16] gridExtra_2.3 scales_1.1.0 mclust_5.4.6
## [19] reshape_0.8.8 RColorBrewer_1.1-2 pheatmap_1.0.12
## [22] signal_0.7-6 Rcpp_1.0.4.6 GenomicRanges_1.38.0
## [25] GenomeInfoDb_1.22.1 IRanges_2.20.2 S4Vectors_0.24.4
## [28] BiocGenerics_0.32.0 ggpubr_0.2.5 magrittr_1.5
## [31] plotly_4.9.2.1 ggplot2_3.3.2.9000 Seurat_3.1.5
##
## loaded via a namespace (and not attached):
## [1] BiocFileCache_1.10.2 plyr_1.8.6 lazyeval_0.2.2
## [4] GSEABase_1.48.0 splines_3.6.3 crosstalk_1.1.0.1
## [7] listenv_0.8.0 digest_0.6.25 htmltools_0.4.0
## [10] gdata_2.18.0 memoise_1.1.0 cluster_2.1.0
## [13] ROCR_1.0-7 globals_0.12.5 annotate_1.64.0
## [16] askpass_1.1 prettyunits_1.1.1 colorspace_1.4-1
## [19] blob_1.2.1 rappdirs_0.3.1 ggrepel_0.8.2
## [22] xfun_0.13 dplyr_0.8.5 crayon_1.3.4
## [25] RCurl_1.98-1.2 jsonlite_1.6.1 genefilter_1.68.0
## [28] survival_3.1-8 zoo_1.8-7 glue_1.4.0
## [31] gtable_0.3.0 zlibbioc_1.32.0 XVector_0.26.0
## [34] leiden_0.3.3 Rgraphviz_2.30.0 future.apply_1.5.0
## [37] DBI_1.1.0 viridisLite_0.3.0 xtable_1.8-4
## [40] progress_1.2.2 reticulate_1.15 bit_4.0.4
## [43] rsvd_1.0.3 tsne_0.1-3 AnnotationForge_1.28.0
## [46] htmlwidgets_1.5.1 httr_1.4.1 gplots_3.0.3
## [49] ellipsis_0.3.0 ica_1.0-2 farver_2.0.3
## [52] pkgconfig_2.0.3 XML_3.99-0.3 uwot_0.1.8
## [55] dbplyr_1.4.3 labeling_0.3 tidyselect_1.0.0
## [58] rlang_0.4.5 reshape2_1.4.4 munsell_0.5.0
## [61] tools_3.6.3 RSQLite_2.2.1 ggridges_0.5.2
## [64] evaluate_0.14 stringr_1.4.0 yaml_2.2.1
## [67] npsurv_0.4-0 knitr_1.28 bit64_4.0.5
## [70] fitdistrplus_1.0-14 caTools_1.18.0 purrr_0.3.4
## [73] RANN_2.6.1 pbapply_1.4-2 RBGL_1.62.1
## [76] future_1.17.0 nlme_3.1-144 compiler_3.6.3
## [79] curl_4.3 png_0.1-7 lsei_1.2-0
## [82] ggsignif_0.6.0 tibble_3.0.1 stringi_1.4.6
## [85] lattice_0.20-38 vctrs_0.2.4 pillar_1.4.3
## [88] lifecycle_0.2.0 lmtest_0.9-37 RcppAnnoy_0.0.16
## [91] data.table_1.12.8 cowplot_1.0.0 bitops_1.0-6
## [94] irlba_2.3.3 patchwork_1.0.0 R6_2.4.1
## [97] network_1.16.0 KernSmooth_2.23-16 codetools_0.2-16
## [100] MASS_7.3-51.5 gtools_3.8.2 assertthat_0.2.1
## [103] openssl_1.4.1 withr_2.2.0 sctransform_0.2.1
## [106] GenomeInfoDbData_1.2.2 hms_0.5.3 tidyr_1.0.2
## [109] rmarkdown_2.1 Rtsne_0.15